rm(list=ls(all=TRUE))
graphics.off()
library(rdd)
library(xtable)
library(dplyr)

setwd() # set working directory
load("data/data_alt_boot.rdata")

#drop all from closed and split lists 
data_alt <- data_alt[which(data_alt$marg.los!=0 & data_alt$marg.win != 1 & 
                             data_alt$openlist == 1 & abs(data_alt$margsim) < 1
                           & data_alt$splitlist == 0 & 
                             data_alt$simscore > 0 & data_alt$simscore < 1),]

#if cases are extremely close they might have wrong signed simscore. Recode these to arbitrarily small simscore
data_alt$margsim <- with(data_alt, qnorm(simscore) - qnorm(marg.thres))
data_alt$margsim[data_alt$margsim <= 0 & data_alt$elected == 1] <-  0.001
data_alt$margsim[data_alt$margsim >= 0 & data_alt$elected == 0] <- -0.001

clusters           <- data.frame(unique(cbind(data_alt$year,data_alt$muncpr,data_alt$candpart)))
colnames(clusters) <- c("year", "muncpr", "candpart")
clusters$cluster   <- 1:nrow(clusters)
data_alt$year        <- as.factor(data_alt$year)
data_alt$muncpr      <- as.factor(data_alt$muncpr)
data_alt <- left_join(data_alt, clusters, by = c("year", "muncpr", "candpart"))

#subset data to remove missing on main varibles and keep only relevant variables
data_alt <- subset(data_alt, 
                   !is.na(margsim) & !is.na(electedt1) & !is.na(rerun) & !is.na(cluster),
                   select = c(margsim, electedt1, rerun, 
                              cluster, candvote, ptotvote,  
                              mtotvavo, candpart, candlino))

#create variables for belonging to main left wing parties (including Social Liberals)
#and personal votes as proportion of list votes and municipality total votes
data_alt$mainleft      <- data_alt$candpart %in% c("A", "F", "B", "Oe")
data_alt$share_of_party <- data_alt$candvote/data_alt$ptotvote
data_alt$share_of_mun  <- data_alt$candvote/data_alt$mtotvavo

##Display distribution and test for validity with McCrary
quartz(width = 12, height = 6)
par(mfrow=c(1,2), 
    mar=c(5,4,3,1),
    cex = 1.1)

#plot density as histogram
hist(data_alt$margsim, 
     main = "Distribution of forcing variable",
     xlab = "Score relative to threshold",
     breaks = 50,
     xlim = c(-4,4))
box(lwd = 1.5)
abline(v=0, lty = 2, col="grey50")

#plot density with McCrary test 
McCrary <- DCdensity(data_alt$margsim)

box(lwd = 1.5)
title(main = "McCrary's density test \n for forcing variable",
      ylab = "Density",
      sub = paste("N =",length(data_alt$margsim)),
      xlab = "Score relative to threshold")
abline(v=0, lty = 2, col="grey50")
legend("topright", 
       paste("p(McCrary) >", 
             floor(100*McCrary)/100),
       box.lwd = 0, 
       box.col = "white", 
       bg = "white")
box(lwd = 1.5)

##Estimate and plot RD
#find Imbens-Kalyanaraman bandwidth
IK.bw  <- IKbandwidth(data_alt$margsim, data_alt$electedt1)
IK.bw2 <- IKbandwidth(data_alt$margsim, data_alt$rerun)

##rerunning and elected 
rdest    <- RDestimate(electedt1 ~ margsim, data = data_alt)
rdest.cl <- RDestimate(electedt1 ~ margsim, data = data_alt, cluster = data_alt$cluster)

graphics.off()
quartz(w=8, h =4)
par(mfrow=c(1,2), 
    mar=c(4,4,2,1),
    cex = 1.1)

plot(rdest, range = c(-4,4))
title( ylab = "Pr(Rerunning and elected)", xlab = "Votes relative to threshold")
#add rugplot to illustrate point density over running var
rugz1 <- quantile(data_alt$margsim, probs = 0:100/100, na.rm = TRUE)
rug(rugz1)
abline(v=0, lty = 2, col = "grey50")

#plot for rerunning
rdest1    <- RDestimate(rerun ~ margsim, data = data_alt)
rdest1.cl <- RDestimate(rerun ~ margsim, data = data_alt, cluster = data_alt$cluster)
plot(rdest1, range = c(-4,4))
title( ylab = "Pr(Rerunning)", xlab = "Votes relative to threshold")
rug(rugz1)
abline(v=0, lty = 2, col = "grey50")

##########################################
##########################################
####### descriptive statistics ###########
##########################################
##########################################

weight <- 1 - abs(data_alt$margsim)/IK.bw
weight <- weight[weight > 0]
desc_data <- subset(data_alt, 
                    abs(data_alt$margsim)<IK.bw, 
                    select = -c(cluster, candpart))
labels <- colnames(desc_data)

weighted_means  <- matrix(NA, nrow = ncol(desc_data), ncol = 3)
for (i in 1:ncol(desc_data)){
  weighted_means[i,1] <- round(weighted.mean(desc_data[,labels[i]], w = weight),3)
  weighted_means[i,2] <- round(weighted.mean(desc_data[desc_data$margsim > 0,labels[i]], 
                                             w = weight[desc_data$margsim > 0]),3)
  weighted_means[i,3] <- round(weighted.mean(desc_data[desc_data$margsim < 0,labels[i]], 
                                             w = weight[desc_data$margsim < 0]),3)
}

colnames(weighted_means) <- c("All within bandwidth", "Winners", "Losers")
rownames(weighted_means) <- c("Simulation score",
                              "Elected at t1",
                              "Running at t1",
                              "Personal votes",
                              "Votes for party",
                              "Votes in municipality",
                              "Number on list",
                              "Left wing party",
                              "Personal share of party votes",
                              "Personal share of municipality votes")


xtable(weighted_means, 
       digits = rbind(matrix(3, nrow = 3, ncol = 4),
                      matrix(0, nrow = 3, ncol = 4),
                      matrix(3, nrow = 4, ncol = 4))       )

##Summarize jumps in table

out1 <- cbind(rdest$est,
              rdest$se,
              rdest.cl$se,
              rdest$bw,
              rdest$obs)
colnames(out1) <- c("Estimate", "Std. Error", "Rob. Std. Error", "Bandwidth", "Observations")

out2 <- cbind(rdest1$est,
              rdest1$se,
              rdest1.cl$se,
              rdest1$bw,
              rdest1$obs)
colnames(out2) <- c("Estimate", "Std. Error", "Rob. Std. Error", "Bandwidth", "Observations")

digits_tab <- rbind(matrix(3, nrow = 4, ncol = 7), rep(0,7))

xtable(cbind(t(out1),t(out2)), digits = digits_tab)

##Find CIs

CI1 <- c(rdest$est[1] + qnorm(0.025)*rdest$se[1],
         rdest$est[1] + qnorm(0.975)*rdest$se[1])
CI2 <- c(rdest1$est[1] + qnorm(0.025)*rdest1$se[1],
         rdest1$est[1] + qnorm(0.975)*rdest1$se[1])
